import pandas as pd
# Import der Daten (zwei Kommentarzeilen werden übersprungen)
daten = pd.read_csv('metall_oder_stein.csv', skiprows=2)
daten.info()
# Check der Vollständigkeit:
print(daten.isnull().sum())
# Blick in die Daten:
daten.head(10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 208 entries, 0 to 207
Data columns (total 61 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Signal01 208 non-null float64
1 Signal02 208 non-null float64
2 Signal03 208 non-null float64
3 Signal04 208 non-null float64
4 Signal05 208 non-null float64
5 Signal06 208 non-null float64
6 Signal07 208 non-null float64
7 Signal08 208 non-null float64
8 Signal09 208 non-null float64
9 Signal10 208 non-null float64
10 Signal11 208 non-null float64
11 Signal12 208 non-null float64
12 Signal13 208 non-null float64
13 Signal14 208 non-null float64
14 Signal15 208 non-null float64
15 Signal16 208 non-null float64
16 Signal17 208 non-null float64
17 Signal18 208 non-null float64
18 Signal19 208 non-null float64
19 Signal20 208 non-null float64
20 Signal21 208 non-null float64
21 Signal22 208 non-null float64
22 Signal23 208 non-null float64
23 Signal24 208 non-null float64
24 Signal25 208 non-null float64
25 Signal26 208 non-null float64
26 Signal27 208 non-null float64
27 Signal28 208 non-null float64
28 Signal29 208 non-null float64
29 Signal30 208 non-null float64
30 Signal31 208 non-null float64
31 Signal32 208 non-null float64
32 Signal33 208 non-null float64
33 Signal34 208 non-null float64
34 Signal35 208 non-null float64
35 Signal36 208 non-null float64
36 Signal37 208 non-null float64
37 Signal38 208 non-null float64
38 Signal39 208 non-null float64
39 Signal40 208 non-null float64
40 Signal41 208 non-null float64
41 Signal42 208 non-null float64
42 Signal43 208 non-null float64
43 Signal44 208 non-null float64
44 Signal45 208 non-null float64
45 Signal46 208 non-null float64
46 Signal47 208 non-null float64
47 Signal48 208 non-null float64
48 Signal49 208 non-null float64
49 Signal50 208 non-null float64
50 Signal51 208 non-null float64
51 Signal52 208 non-null float64
52 Signal53 208 non-null float64
53 Signal54 208 non-null float64
54 Signal55 208 non-null float64
55 Signal56 208 non-null float64
56 Signal57 208 non-null float64
57 Signal58 208 non-null float64
58 Signal59 208 non-null float64
59 Signal60 208 non-null float64
60 Material 208 non-null object
dtypes: float64(60), object(1)
memory usage: 99.3+ KB
Signal01 0
Signal02 0
Signal03 0
Signal04 0
Signal05 0
..
Signal57 0
Signal58 0
Signal59 0
Signal60 0
Material 0
Length: 61, dtype: int64
| Signal01 | Signal02 | Signal03 | Signal04 | Signal05 | Signal06 | Signal07 | Signal08 | Signal09 | Signal10 | ... | Signal52 | Signal53 | Signal54 | Signal55 | Signal56 | Signal57 | Signal58 | Signal59 | Signal60 | Material | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0200 | 0.0371 | 0.0428 | 0.0207 | 0.0954 | 0.0986 | 0.1539 | 0.1601 | 0.3109 | 0.2111 | ... | 0.0027 | 0.0065 | 0.0159 | 0.0072 | 0.0167 | 0.0180 | 0.0084 | 0.0090 | 0.0032 | Stein |
| 1 | 0.0453 | 0.0523 | 0.0843 | 0.0689 | 0.1183 | 0.2583 | 0.2156 | 0.3481 | 0.3337 | 0.2872 | ... | 0.0084 | 0.0089 | 0.0048 | 0.0094 | 0.0191 | 0.0140 | 0.0049 | 0.0052 | 0.0044 | Stein |
| 2 | 0.0262 | 0.0582 | 0.1099 | 0.1083 | 0.0974 | 0.2280 | 0.2431 | 0.3771 | 0.5598 | 0.6194 | ... | 0.0232 | 0.0166 | 0.0095 | 0.0180 | 0.0244 | 0.0316 | 0.0164 | 0.0095 | 0.0078 | Stein |
| 3 | 0.0100 | 0.0171 | 0.0623 | 0.0205 | 0.0205 | 0.0368 | 0.1098 | 0.1276 | 0.0598 | 0.1264 | ... | 0.0121 | 0.0036 | 0.0150 | 0.0085 | 0.0073 | 0.0050 | 0.0044 | 0.0040 | 0.0117 | Stein |
| 4 | 0.0762 | 0.0666 | 0.0481 | 0.0394 | 0.0590 | 0.0649 | 0.1209 | 0.2467 | 0.3564 | 0.4459 | ... | 0.0031 | 0.0054 | 0.0105 | 0.0110 | 0.0015 | 0.0072 | 0.0048 | 0.0107 | 0.0094 | Stein |
| 5 | 0.0286 | 0.0453 | 0.0277 | 0.0174 | 0.0384 | 0.0990 | 0.1201 | 0.1833 | 0.2105 | 0.3039 | ... | 0.0045 | 0.0014 | 0.0038 | 0.0013 | 0.0089 | 0.0057 | 0.0027 | 0.0051 | 0.0062 | Stein |
| 6 | 0.0317 | 0.0956 | 0.1321 | 0.1408 | 0.1674 | 0.1710 | 0.0731 | 0.1401 | 0.2083 | 0.3513 | ... | 0.0201 | 0.0248 | 0.0131 | 0.0070 | 0.0138 | 0.0092 | 0.0143 | 0.0036 | 0.0103 | Stein |
| 7 | 0.0519 | 0.0548 | 0.0842 | 0.0319 | 0.1158 | 0.0922 | 0.1027 | 0.0613 | 0.1465 | 0.2838 | ... | 0.0081 | 0.0120 | 0.0045 | 0.0121 | 0.0097 | 0.0085 | 0.0047 | 0.0048 | 0.0053 | Stein |
| 8 | 0.0223 | 0.0375 | 0.0484 | 0.0475 | 0.0647 | 0.0591 | 0.0753 | 0.0098 | 0.0684 | 0.1487 | ... | 0.0145 | 0.0128 | 0.0145 | 0.0058 | 0.0049 | 0.0065 | 0.0093 | 0.0059 | 0.0022 | Stein |
| 9 | 0.0164 | 0.0173 | 0.0347 | 0.0070 | 0.0187 | 0.0671 | 0.1056 | 0.0697 | 0.0962 | 0.0251 | ... | 0.0090 | 0.0223 | 0.0179 | 0.0084 | 0.0068 | 0.0032 | 0.0035 | 0.0056 | 0.0040 | Stein |
10 rows × 61 columns
daten['Material'].unique()
daten['Material'] = daten['Material'].replace({'Stein': '0', 'Metall': '1'}).astype(int)
daten.describe()
| Signal01 | Signal02 | Signal03 | Signal04 | Signal05 | Signal06 | Signal07 | Signal08 | Signal09 | Signal10 | ... | Signal52 | Signal53 | Signal54 | Signal55 | Signal56 | Signal57 | Signal58 | Signal59 | Signal60 | Material | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | ... | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 | 208.000000 |
| mean | 0.029164 | 0.038437 | 0.043832 | 0.053892 | 0.075202 | 0.104570 | 0.121747 | 0.134799 | 0.178003 | 0.208259 | ... | 0.013420 | 0.010709 | 0.010941 | 0.009290 | 0.008222 | 0.007820 | 0.007949 | 0.007941 | 0.006507 | 0.533654 |
| std | 0.022991 | 0.032960 | 0.038428 | 0.046528 | 0.055552 | 0.059105 | 0.061788 | 0.085152 | 0.118387 | 0.134416 | ... | 0.009634 | 0.007060 | 0.007301 | 0.007088 | 0.005736 | 0.005785 | 0.006470 | 0.006181 | 0.005031 | 0.500070 |
| min | 0.001500 | 0.000600 | 0.001500 | 0.005800 | 0.006700 | 0.010200 | 0.003300 | 0.005500 | 0.007500 | 0.011300 | ... | 0.000800 | 0.000500 | 0.001000 | 0.000600 | 0.000400 | 0.000300 | 0.000300 | 0.000100 | 0.000600 | 0.000000 |
| 25% | 0.013350 | 0.016450 | 0.018950 | 0.024375 | 0.038050 | 0.067025 | 0.080900 | 0.080425 | 0.097025 | 0.111275 | ... | 0.007275 | 0.005075 | 0.005375 | 0.004150 | 0.004400 | 0.003700 | 0.003600 | 0.003675 | 0.003100 | 0.000000 |
| 50% | 0.022800 | 0.030800 | 0.034300 | 0.044050 | 0.062500 | 0.092150 | 0.106950 | 0.112100 | 0.152250 | 0.182400 | ... | 0.011400 | 0.009550 | 0.009300 | 0.007500 | 0.006850 | 0.005950 | 0.005800 | 0.006400 | 0.005300 | 1.000000 |
| 75% | 0.035550 | 0.047950 | 0.057950 | 0.064500 | 0.100275 | 0.134125 | 0.154000 | 0.169600 | 0.233425 | 0.268700 | ... | 0.016725 | 0.014900 | 0.014500 | 0.012100 | 0.010575 | 0.010425 | 0.010350 | 0.010325 | 0.008525 | 1.000000 |
| max | 0.137100 | 0.233900 | 0.305900 | 0.426400 | 0.401000 | 0.382300 | 0.372900 | 0.459000 | 0.682800 | 0.710600 | ... | 0.070900 | 0.039000 | 0.035200 | 0.044700 | 0.039400 | 0.035500 | 0.044000 | 0.036400 | 0.043900 | 1.000000 |
8 rows × 61 columns
import plotly.express as px
fig = px.box(daten.drop('Material', axis=1),
title='Stein oder Metall',
labels={'variable': 'Eigenschaft', 'value':'Wert'})
fig.show()
# Berechnung der Anzahl der Einträge
print(daten['Material'].value_counts())
# Visulisierung als Balkendiagramm
fig = px.bar(daten['Material'].value_counts(),
title='Stein oder Metall')
fig.update_layout(
yaxis_title='Material',
xaxis_title='Signal-Nummer',
showlegend=False
)
fig.show()
Material
1 111
0 97
Name: count, dtype: int64
from sklearn.model_selection import train_test_split
# Kodierung mit Check
daten['Material'] = daten['Material'].replace({'Stein':'0', 'Metall':'1'}).astype('int')
print(daten['Material'].unique())
# Aufteilung in Input/Output
X = daten.drop('Material', axis=1)
y = daten['Material']
# Split 80:20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
[0 1]
from sklearn.tree import DecisionTreeClassifier
# Instanziierung des Entscheidungsbaums mit Training
modell_baum = DecisionTreeClassifier()
modell_baum.fit(X_train,y_train)
# Bewertung
score_train = modell_baum.score(X_train, y_train)
score_test = modell_baum.score(X_test, y_test)
print(f'Score Trainingsdaten Entscheidungsbaum: {score_train :.2f}')
print(f'Score Testdaten Entscheidungsbaum: {score_test :.2f}')
Score Trainingsdaten Entscheidungsbaum: 1.00
Score Testdaten Entscheidungsbaum: 0.67
from sklearn.ensemble import RandomForestClassifier
for n in [1, 5, 10, 20, 50, 100]:
# Instanziierung des Random-Forest-Modells und Training
modell_rf = RandomForestClassifier(n_estimators=n, random_state=0)
modell_rf.fit(X_train, y_train)
# Bewertung
score_train = modell_rf.score(X_train, y_train)
score_test = modell_rf.score(X_test, y_test)
print(f'Anzahl Entscheidungsbäume: {n}')
print(f'Score Training: {score_train :.2f} | Score Test: {score_test :.2f}')
print('')
Anzahl Entscheidungsbäume: 1
Score Training: 0.86 | Score Test: 0.64
Anzahl Entscheidungsbäume: 5
Score Training: 0.97 | Score Test: 0.74
Anzahl Entscheidungsbäume: 10
Score Training: 0.99 | Score Test: 0.86
Anzahl Entscheidungsbäume: 20
Score Training: 1.00 | Score Test: 0.81
Anzahl Entscheidungsbäume: 50
Score Training: 1.00 | Score Test: 0.88
Anzahl Entscheidungsbäume: 100
Score Training: 1.00 | Score Test: 0.88
from sklearn.svm import SVC
# Instanziierung einer linearen SVM und Training
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train, y_train)
# Bewertung lineare SVM
score_train = svm_linear.score(X_train, y_train)
score_test = svm_linear.score(X_test, y_test)
print(f'Score Trainingsdaten lineare SVM: {score_train :.2f}')
print(f'Score Testdaten lineare SVM: {score_test :.2f}')
# Instanziierung einer nichtlinearen SVM und Training
svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X_train, y_train)
# Bewertung nichtlineare SVM
score_train = svm_rbf.score(X_train, y_train)
score_test = svm_rbf.score(X_test, y_test)
print(f'Score Trainingsdaten lineare SVM: {score_train :.2f}')
print(f'Score Testdaten lineare SVM: {score_test :.2f}')
Score Trainingsdaten lineare SVM: 0.86
Score Testdaten lineare SVM: 0.86
Score Trainingsdaten lineare SVM: 0.88
Score Testdaten lineare SVM: 0.79